Philipp Daiss, Tobias Schmitt, Jens Stöhr und Peter Kowalczyk
require(tidyverse)
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 3.2.5
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.2.5
## Warning: package 'tibble' was built under R version 3.2.5
## Warning: package 'tidyr' was built under R version 3.2.5
## Warning: package 'readr' was built under R version 3.2.5
## Warning: package 'purrr' was built under R version 3.2.5
## Warning: package 'dplyr' was built under R version 3.2.5
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
require(rvest)
## Loading required package: rvest
## Warning: package 'rvest' was built under R version 3.2.5
## Loading required package: xml2
## Warning: package 'xml2' was built under R version 3.2.5
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
require(jsonlite)
## Loading required package: jsonlite
## Warning: package 'jsonlite' was built under R version 3.2.5
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
require(ggmap)
## Loading required package: ggmap
## Warning: package 'ggmap' was built under R version 3.2.5
url1 = "https://www.immobilienscout24.de/Suche/S-T/P-"
url2 = "/Wohnung-Miete/Bayern/Wuerzburg?pagerReporting=true"
allResults = NULL
for (i in 1:6)
{
URL = paste0(url1,i,url2)
URL %>%
read_html() %>%
html_nodes(".result-list-entry__brand-title-container") %>%
html_attr("href") -> tmp
allResults = c(allResults,tmp)
}
i steht für die Möglichen Seiten der Suche. url1, url2 und der Zähler i bilden die URL.Daraus entsteht eine abfrage die den HTML Code einliest und nach Elementen mit der Klasse ’result-list-entry__brand-title-container’ suchen. Daraus wird das Attribut href ausgelesen und in die Variable allResult durch hinzufügen eines neues Vektors geschrieben Probleme treten in zwei Richtungen auf. Fehlermeldung, falls es weniger Angebote und dadurch Seiten werden Ebenfalls könnten nicht alle Angebote gefiltert werden, wenn es mehr werden sollten.
getApartment<- function (city){
allResultsNew = NULL
#Erstellen der StartURL
firstURL<-'https://www.immobilienscout24.de/Suche/S-T/Wohnung-Miete/Bayern/'
city<- gsub(pattern = '[ä]', replacement = "ae",city)
city<- gsub(pattern = '[ü]', replacement = "ue",city)
city<- gsub(pattern = '[ö]', replacement = "oe",city)
firstURL<-paste0(firstURL,city)
#Auslesen des Dropdowns
pageSelektor <- firstURL %>%
read_html() %>%
html_nodes("#pageSelection select option") %>%
html_attr("value")
mainUrl ='https://www.immobilienscout24.de'
for(i in 1:length(pageSelektor)){
URL <- paste0(mainUrl,pageSelektor[i])
URL %>%
read_html() %>%
html_nodes(".result-list-entry__brand-title-container") %>%
html_attr("href") -> tmp
allResultsNew = c(allResultsNew,tmp)
}
return (allResultsNew)
}
ApartmentWue <- getApartment('Wuerzburg')
ApartmentBam <- getApartment('Bamberg')
ApartmentBay <- getApartment('Bayreuth')
ApartmentAug <- getApartment('Augsburg')
ApartmentWue
## [1] "/expose/94818251" "/expose/95187153" "/expose/87409778"
## [4] "/expose/95679720" "/expose/87152588" "/expose/95568270"
## [7] "/expose/95028544" "/expose/94846454" "/expose/95404536"
## [10] "/expose/95785022" "/expose/95923053" "/expose/95913899"
## [13] "/expose/95873535" "/expose/95867642" "/expose/95840545"
## [16] "/expose/95832659" "/expose/95831007" "/expose/95813960"
## [19] "/expose/95806539" "/expose/95805058" "/expose/95802506"
## [22] "/expose/95779345" "/expose/95709146" "/expose/95688766"
## [25] "/expose/95688642" "/expose/95620872" "/expose/95541644"
## [28] "/expose/95475395" "/expose/95465588" "/expose/95412519"
## [31] "/expose/95297797" "/expose/95260941" "/expose/95217270"
## [34] "/expose/95082027" "/expose/95074701" "/expose/95067986"
## [37] "/expose/94995948" "/expose/94943106" "/expose/94942637"
## [40] "/expose/94847683" "/expose/94826185" "/expose/94809562"
## [43] "/expose/94808936" "/expose/94806688" "/expose/94757401"
## [46] "/expose/94700676" "/expose/94612291" "/expose/94470867"
## [49] "/expose/94442425" "/expose/94364931" "/expose/94304886"
## [52] "/expose/93860417" "/expose/93808937" "/expose/93794011"
## [55] "/expose/93755640" "/expose/93372905" "/expose/93370302"
## [58] "/expose/93090839" "/expose/93069103" "/expose/93061387"
## [61] "/expose/93059945" "/expose/92668262" "/expose/92665654"
## [64] "/expose/91517844" "/expose/91075291" "/expose/90830060"
## [67] "/expose/90481624" "/expose/90257861" "/expose/90041927"
## [70] "/expose/89766602" "/expose/89466269" "/expose/88932047"
## [73] "/expose/88911037" "/expose/88765818" "/expose/88349589"
## [76] "/expose/86808654" "/expose/86808615" "/expose/85625092"
## [79] "/expose/85252877" "/expose/84990183" "/expose/84974867"
## [82] "/expose/83972017" "/expose/83963661" "/expose/83944141"
## [85] "/expose/82856550" "/expose/82856534" "/expose/82856532"
## [88] "/expose/82856444" "/expose/82856418" "/expose/82856416"
## [91] "/expose/82856337" "/expose/82856273" "/expose/82856247"
## [94] "/expose/82856239" "/expose/81421109" "/expose/75514953"
## [97] "/expose/72267066" "/expose/48128650" "/expose/95837675"
## [100] "/expose/95832263" "/expose/95829876"
ApartmentBam
## [1] "/expose/93557573" "/expose/94751148" "/expose/68045960"
## [4] "/expose/95125095" "/expose/94758512" "/expose/94925272"
## [7] "/expose/94925414" "/expose/95831108" "/expose/87633975"
## [10] "/expose/95569057" "/expose/95283775" "/expose/79888195"
## [13] "/expose/95987141" "/expose/95950007" "/expose/95912942"
## [16] "/expose/95807676" "/expose/95807575" "/expose/95781655"
## [19] "/expose/95761887" "/expose/95759522" "/expose/95756468"
## [22] "/expose/95736982" "/expose/95736980" "/expose/95736973"
## [25] "/expose/95736383" "/expose/95683996" "/expose/95662182"
## [28] "/expose/95588993" "/expose/95540676" "/expose/95470826"
## [31] "/expose/95449890" "/expose/95406557" "/expose/95308418"
## [34] "/expose/95203430" "/expose/95185103" "/expose/95155373"
## [37] "/expose/95050027" "/expose/94957584" "/expose/94920612"
## [40] "/expose/94865300" "/expose/94600543" "/expose/94536613"
## [43] "/expose/94535025" "/expose/94403688" "/expose/94160526"
## [46] "/expose/93687739" "/expose/93446459" "/expose/92922697"
## [49] "/expose/92911036" "/expose/92282596" "/expose/90573621"
## [52] "/expose/89488397" "/expose/89206439" "/expose/85758955"
## [55] "/expose/85304729" "/expose/84004573" "/expose/80313193"
## [58] "/expose/79872456" "/expose/78761604" "/expose/76625529"
## [61] "/expose/75018974" "/expose/69651900" "/expose/57832571"
## [64] "/expose/89855806" "/expose/84445618" "/expose/84292856"
## [67] "/expose/83149017" "/expose/83004803" "/expose/82860886"
## [70] "/expose/66104508" "/expose/50730259" "/expose/35888561"
Wäre auch als Schleife Möglich, jedoch Ausnahmen für Bonität und Internet nötig
getApartmentDetails<-function(detailURL){
exposeID<- gsub('/expose/','',detailURL)
detailURL <- paste0('https://www.immobilienscout24.de',detailURL)
detailURL<- detailURL %>%
read_html()
kaltmiete<-detailURL %>%
html_nodes(".is24qa-kaltmiete") %>%
html_text()
kaltmiete<-gsub(pattern = ' ', replacement = "",kaltmiete[1])
kaltmiete<-substr(kaltmiete, 1, nchar(kaltmiete)-1)
if(length(kaltmiete)==0){
kaltmiete=NA
}
gesamtmiete <-detailURL %>%
html_nodes(".is24qa-gesamtmiete") %>%
html_text()
gesamtmiete<-gsub(pattern = ' ', replacement = "",gesamtmiete)
#Zusatz in Klammern entfernen
gesamtmiete<-gsub(pattern = '\\([^\\)]*\\)', replacement = "",gesamtmiete)
gesamtmiete<-substr(gesamtmiete, 1, nchar(gesamtmiete)-1)
if(length(gesamtmiete)==0){
gesamtmiete=NA
}
if(kaltmiete==gesamtmiete){
nebenkosten='0'
}else{
nebenkosten<-detailURL %>%
html_nodes(".is24qa-nebenkosten") %>%
html_text()
nebenkosten<-gsub(pattern = '[+]', replacement = "",nebenkosten)
nebenkosten<-gsub(pattern = ' ', replacement = "",nebenkosten)
nebenkosten<-substr(nebenkosten, 1, nchar(nebenkosten)-1)
nebenkosten<-gsub(pattern = 'keineAngab', replacement = NA,nebenkosten)
if(length(nebenkosten)==0){
nebenkosten=NA
}
}
zimmer<-detailURL %>%
html_nodes(".is24qa-zimmer") %>%
html_text()
zimmer<-gsub(pattern = ' ', replacement = "",zimmer)
if(length(zimmer)==0){
zimmer=NA
}
flaeche<-detailURL %>%
html_nodes(".is24qa-wohnflaeche-ca") %>%
html_text()
flaeche<-gsub(pattern = ' ', replacement = "",flaeche)
if(length(flaeche)==0){
flaeche=NA
}
wohnungstyp<-detailURL %>%
html_nodes(".is24qa-wohnungstyp") %>%
html_text()
wohnungstyp<-gsub(pattern = ' ', replacement = "",wohnungstyp)
if(length(wohnungstyp)==0){
wohnungstyp=NA
}
haustiere<-detailURL %>%
html_nodes(".is24qa-haustiere") %>%
html_text()
haustiere<-gsub(pattern = ' ', replacement = "",haustiere)
if(length(haustiere)==0){
haustiere=NA
}
parkplatz<-detailURL %>%
html_nodes(".is24qa-garage-stellplatz") %>%
html_text()
parkplatz<-gsub(pattern = ' ', replacement = "",parkplatz)
if(length(parkplatz)==0){
parkplatz=NA
}
anzahlParkplatz<-detailURL %>%
html_nodes(".is24qa-anzahl-garage-stellplatz") %>%
html_text()
anzahlParkplatz<-gsub(pattern = ' ', replacement = "",anzahlParkplatz)
if(length(anzahlParkplatz)==0){
anzahlParkplatz=NA
}
district<-detailURL %>%
html_nodes(".breadcrumb__link") %>%
html_text()
district<-gsub(pattern = ' ', replacement = "",district[4])
street<-detailURL %>%
html_nodes(".address-block") %>%
html_text()
street <-street[2]
street <-gsub(pattern = "[ ]{1,}"," ",street)
street <- gsub('Die vollständige Adresse der Immobilie erhalten Sie vom Anbieter.',"",street)
streetName<- trimws(street)
streetName <-gsub(pattern=", [^ ]+$","",streetName)
street<-gsub(" ","+",streetName)
result = tryCatch({
geocode = fromJSON(paste0("https://maps.googleapis.com/maps/api/geocode/json?address=",street,"+,Germany"))
result<- c(geocode$results$geometry$location$lng[1],geocode$results$geometry$location$lat[1])
}, error = function(e) {
e
})
if(is.null(result)){
longitude<- NA
latitude<- NA
}else{
longitude<- result[1]
latitude<- result[2]
}
return (tibble(exposeID=exposeID,
strasse =streetName,
stadtteil=district,
latitude=latitude,
longitude=longitude,
kaltmiete=kaltmiete,
nebenkosten=nebenkosten,
gesamtmiete=gesamtmiete,
zimmer=zimmer,
flaeche=flaeche,
wohnungstyp=wohnungstyp,
haustiere=haustiere,
parkplatz=parkplatz,
anzahlParkplatz=anzahlParkplatz
)
)
}
Vorbereitung
WueDf<- map_df(ApartmentWue,getApartmentDetails)
WueDf
## # A tibble: 101 × 14
## exposeID strasse stadtteil
## <chr> <chr> <chr>
## 1 94818251 Ruppertsgasse 4, 97084 Würzburg Heidingsfeld
## 2 95187153 97084 Würzburg Heidingsfeld
## 3 87409778 Rottendorfer Straße 51/53/55, 97074 Würzburg Mönchberg
## 4 95679720 97072 Würzburg Dom
## 5 87152588 Rottendorfer Straße 53a, 97074 Würzburg Mönchberg
## 6 95568270 Wöllergasse 2, 97070 Würzburg Neumünster
## 7 95028544 97082 Würzburg Mainviertel
## 8 94846454 Leistenstrasse 63, 97082 Würzburg Nikolausberg
## 9 95404536 97070 Würzburg Dom
## 10 95785022 Sartoriusstraße 3, 97072 Würzburg Rennweg
## # ... with 91 more rows, and 11 more variables: latitude <dbl>,
## # longitude <dbl>, kaltmiete <chr>, nebenkosten <chr>,
## # gesamtmiete <chr>, zimmer <chr>, flaeche <chr>, wohnungstyp <chr>,
## # haustiere <chr>, parkplatz <chr>, anzahlParkplatz <chr>
WueDf$gesamtmiete<-gsub(pattern = '\\.', replacement = "",WueDf$gesamtmiete)
WueDf$gesamtmieteNeu<- format(WueDf$gesamtmiete, digits=2, decimal.mark=".",
small.mark="+", small.interval=3)
WueDf$gesamtmieteNeu<-gsub(pattern = ' ', replacement = "",WueDf$gesamtmieteNeu)
WueDf$gesamtmieteNeu<-gsub(pattern = ',', replacement = ".",WueDf$gesamtmieteNeu)
WueDf$gesamtmieteNeu <- as.double(WueDf$gesamtmieteNeu)
WueDf$flaecheNeu<-substr(WueDf$flaeche, 1, nchar(WueDf$flaeche)-2)
WueDf$flaecheNeu<-gsub(pattern = ' ', replacement = "",WueDf$flaecheNeu)
WueDf$flaecheNeu<-gsub(pattern = ',', replacement = ".",WueDf$flaecheNeu)
WueDf$flaecheNeu <- as.double(WueDf$flaecheNeu)
WueDf$preisQm <- (WueDf$gesamtmieteNeu/WueDf$flaecheNeu)
Visualisierung als Boxplot
g2 <-ggplot(WueDf, aes(x=stadtteil,y=preisQm))
g2 <- g2 +geom_boxplot()
g2 <-g2 +scale_x_discrete(labels = abbreviate)
g2
## Warning in f(...): abbreviate mit nicht-ASCII Zeichen genutzt
Visualisierung als Karte. Farbliche Zuordnung der Stadtteile + Preisniveau
WueDf$preisniveau <-' '
WueDf$preisniveau[WueDf$preisQm >10] <-'+'
WueDf$preisniveau[WueDf$preisQm >14] <-'++'
WueDf$preisniveau[WueDf$preisQm >19] <-'+++'
basemap <- get_map("Wuerzburg",
source = "google",
maptype = "hybrid",
zoom = 12)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Wuerzburg&zoom=12&size=640x640&scale=2&maptype=hybrid&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Wuerzburg&sensor=false
g = ggplot(WueDf,aes(x=longitude,
y=latitude),title( main = "New Orleans area: \n residents 65 and over by ZIP"))
map = ggmap(basemap, base_layer = g,title="test")
map = map + geom_point(size=2,aes(color=stadtteil))
map = map+ ggtitle('Lage und Qm Preis ')
map = map + xlab(" >9 = +, 14> = ++ , > 19 = +++")
map = map + ylab("")
map = map + geom_text(data = WueDf,check_overlap = TRUE , aes(label=WueDf$preisniveau), hjust = 0, color = "white")
map
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text).
#Aufgabe 2: Comey is going down
require(xlsx)
## Loading required package: xlsx
## Warning: package 'xlsx' was built under R version 3.2.5
## Loading required package: rJava
## Warning: package 'rJava' was built under R version 3.2.5
## Loading required package: xlsxjars
## Warning: package 'xlsxjars' was built under R version 3.2.5
require(ggplot2)
require(tidyverse)
require(RGraphics)
## Loading required package: RGraphics
## Warning: package 'RGraphics' was built under R version 3.2.5
## Loading required package: grid
require(ggrepel)
## Loading required package: ggrepel
## Warning: package 'ggrepel' was built under R version 3.2.5
comey_predictit <- read.xlsx("C:/Users/Tobi/Dropbox/Data Science/DataScience/Problemset2/comey-predictit.xlsx",sheetIndex = 1)
#neues DF mit durchschnittlichen Preisen generieren
AvgPrices <- comey_predictit %>%
mutate(AvgSharePrice = (comey_predictit$OpenSharePrice + comey_predictit$CloseSharePrice)/2) %>%
mutate(DateString = factor(comey_predictit$DateString, levels = rev(unique(comey_predictit$DateString)))
)
#plot generieren
p<- ggplot(AvgPrices, aes(DateString, AvgSharePrice, group = 1, col="red")) +
geom_line(color = "deepskyblue", size = 1.7)+
coord_flip () +
geom_hline(yintercept = 20, color = "steelblue") +
geom_hline(yintercept = 40, color = "steelblue") +
geom_hline(yintercept = 60, color = "steelblue") +
geom_hline(yintercept = 80, color = "steelblue") +
geom_hline(yintercept = 100, color = "steelblue") +
labs(title = "PredictIt market on whether Mr Comey would keep his job until June 30th 2017", x = "2017", y = "Likelihood, %", color="red") +
scale_y_continuous(position="top",breaks=seq(0, 100, by=20),sec.axis = dup_axis(name = waiver())) +
#ersetzen der Datumsskala durch ein mittig gesetztes Monatslabel
scale_x_discrete("2017", labels = c("2017-02-13" = "","2017-02-14" = "","2017-02-15" = "","2017-02-16" = "","2017-02-17" = "","2017-02-18" = "",
"2017-02-19" = "","2017-02-20" = "Feb","2017-02-21" = "","2017-02-22" = "","2017-02-23" = "","2017-02-24" = "",
"2017-02-25" = "","2017-02-26" = "","2017-02-27" = "","2017-02-28" = "","2017-03-01" = "","2017-03-02" = "",
"2017-03-03" = "","2017-03-04" = "","2017-03-05" = "","2017-03-06" = "","2017-03-07" = "","2017-03-08" = "",
"2017-03-09" = "","2017-03-10" = "","2017-03-11" = "","2017-03-12" = "","2017-03-13" = "","2017-03-14" = "",
"2017-03-02" = "","2017-03-02" = "","2017-03-02" = "","2017-03-02" = "","2017-03-02" = "","2017-03-02" = "",
"2017-03-15" = "Mar","2017-03-16" = "","2017-03-17" = "","2017-03-18" = "","2017-03-19" = "","2017-03-20" = "",
"2017-03-21" = "","2017-03-22" = "","2017-03-23" = "","2017-03-24" = "","2017-03-25" = "","2017-03-26" = "",
"2017-03-27" = "","2017-03-28" = "","2017-03-29" = "","2017-03-30" = "","2017-03-31" = "","2017-04-01" = "",
"2017-04-02" = "","2017-04-03" = "","2017-04-04" = "","2017-04-05" = "","2017-04-06" = "","2017-04-07" = "",
"2017-04-08" = "","2017-04-09" = "","2017-04-10" = "","2017-04-11" = "","2017-04-12" = "","2017-04-13" = "",
"2017-04-14" = "","2017-04-15" = "Apr","2017-04-16" = "","2017-04-17" = "","2017-04-18" = "","2017-04-19" = "",
"2017-04-20" = "","2017-04-21" = "","2017-04-22" = "","2017-04-23" = "","2017-04-24" = "","2017-04-25" = "",
"2017-04-26" = "","2017-04-27" = "","2017-04-28" = "","2017-04-29" = "","2017-04-30" = "","2017-05-01" = "",
"2017-05-02" = "","2017-05-03" = "","2017-05-04" = "","2017-05-05" = "","2017-05-06" = "","2017-05-07" = "May",
"2017-05-08" = "","2017-05-09" = "","2017-05-10" = "","2017-05-11" = "","2017-05-12" = "","2017-05-13" = "")) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_rect(fill ="white"))
#Annotations
subset1 <- subset(AvgPrices, DateString == "2017-03-03")
p <- p + geom_point(data=subset1,color="red")+
geom_text_repel(data=subset1, label="Democrats in Congress criticise Mr Comey
for his differing approches to handling
the cases involving Mr Trump and Mrs Clinton",fontface = 'bold',color='black',nudge_x=18,nudge_y=-50,point.padding = unit(1, "lines"), segment.color = 'red')
subset2 <- subset(AvgPrices, DateString == "2017-03-06")
p <- p + geom_point(data=subset2,color="red")+
geom_text_repel(data=subset2, label="Mr Trump accuses Barack Obama's administration
of wiretapping his phones during the presidential campaign",fontface = 'bold',color='black',nudge_x=5,nudge_y=-40,point.padding = unit(1, "lines"), segment.color = 'red')
subset3 <- subset(AvgPrices, DateString == "2017-03-19")
p <- p + geom_point(data=subset3,color="red")+
geom_text_repel(data=subset3, label="Mr Comey testifies to Congress. He dismisses
Mr Trump's wiretap claims, and confirms the FBI is investigating
links between Russia and Mr Trump's campaign",fontface = 'bold',color='black',nudge_x=0,nudge_y=-50,point.padding = unit(1, "lines"), segment.color = 'red')
subset4 <- subset(AvgPrices, DateString == "2017-04-13")
p <- p + geom_point(data=subset4,color="red")+
geom_text_repel(data=subset4, label="The media reports that the FBI obtained a
warrant to investigate links between Carter Page, an informal
adviser to Mr Trump, and Russian officials",fontface = 'bold',color='black',nudge_x=10,nudge_y=-55,point.padding = unit(1, "lines"), segment.color = 'red')
subset5 <- subset(AvgPrices, DateString == "2017-05-04")
p <- p + geom_point(data=subset5,color="red")+
geom_text_repel(data=subset5, label="Mr Comey gives testimony saying he felt 'middly
nauseous' at the thoght that the FBI probe of Mrs Clinton may have affected
the election. He also says the FBI found 'hundreds of thousands' of e-mails
relating to Mrs Clinton on Mr Weiner's laptop",fontface = 'bold',color='black',nudge_x=20,nudge_y=-55,point.padding = unit(1, "lines"), segment.color = 'red')
subset6 <- subset(AvgPrices, DateString == "2017-05-13")
p <- p + geom_point(data=subset6,color="red")+
geom_text_repel(data=subset6, label="The FBI issues a statement correcting Mr Comey's
testimony on May 3rd, saying the e-mails found on Mr Weiner's laptop mostly resulted
from a backup process. Mr Trump sacks Mr Comey, citing the advice of the attorney-general,
over his handling of the Clinton investigation",fontface = 'bold', color='black',nudge_x=10 ,nudge_y=5, point.padding = unit(1, "lines"), segment.color = 'red')
#assign color to axis
p <-p + theme(
plot.title = element_text(color="deepskyblue", size=14, face="bold"),
axis.title.x = element_text(color="black", size=11, hjust=1, vjust =1),
axis.title.y = element_text(color="red", size=14, face="bold", angle = 360)
)
p
require(ggrepel)
require(ggmap)
require(tidyverse)
require(WikidataR)
## Loading required package: WikidataR
require(WikidataQueryServiceR)
## Loading required package: WikidataQueryServiceR
#DataFrame aus Wikidata erstellen
World_Heritages = query_wikidata('#List of World Heritages in Germany
#defaultView:Map
SELECT DISTINCT ?heritage ?heritageLabel ?lat ?lon
WHERE
{
?heritage wdt:P1435 wd:Q9259 ;
wdt:P17 wd:Q183 ;
wdt:P625 ?coord .
?heritage p:P625 ?coordinate .
?coordinate psv:P625 ?coordinate_node .
?coordinate_node wikibase:geoLatitude ?lat .
?coordinate_node wikibase:geoLongitude ?lon .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en, de". }
}
ORDER BY ?date')
## 86 rows were returned by WDQS
# Aussortieren der zweie Duplikate, welche im Datensatz vorhanden sind (aufgrund, unterschiedlicher longitudes)
World_Heritages <- World_Heritages[!duplicated(World_Heritages$heritage),]
World_Heritages <- World_Heritages[!duplicated(World_Heritages$heritageLabel),]
# Aussortieren der "Prähistorischen Pfahlbauten". Wikidata wählt hier wohl den ersten Ort, welcher alphabetisch in der Tabelle mit allen Pfahlbauten hinterlegt ist.
# [OPTIONAL] Falls keine Einschränkung hierfür erwünscht ist, kein dieser Befehl ignoriert werden.
World_Heritages <- subset(World_Heritages,!World_Heritages$heritageLabel=="prehistoric pile dwellings around the Alps")
# [Empfohlen] Aussortieren zweier UNESCO Weltkulturerbe, welche ebenfalls in Wikidata Deutschland, neben anderen Ländern zugeordnet werden, aber laut Koordinaten nicht in Deutschland liegen.
# Nimmt man diese hinzu wird eine Fehlermeldung ?ber diese zwei Orte ausgegeben, welche nicht auf der Karte, aufgrund des zu nahen Zooms auf Deutschland, dargestellt werden können.
# Falls keine Aussortierung hierfür erwünscht ist, kein dieser Befehl ignoriert werden.
# Die zwei Entities: "Borders of the Roman Empire" & "Primeval Beech Forests of the Carpathians and the Ancient Beech Forests of Germany"
World_Heritages <- subset(World_Heritages,!World_Heritages$heritageLabel=="Borders of the Roman Empire")
World_Heritages <- subset(World_Heritages,!World_Heritages$heritageLabel=="Primeval Beech Forests of the Carpathians and the Ancient Beech Forests of Germany")
#Eerste Mapping-Möglichkeit:
#mit get_map-Funktion, wie im Aufgabenblatt vorgeschlagen.
#Hier werden jedoch Länder und Städtenamen mitdargestellt, was wiederum die Übersichtlichkeit der Grafik meiner Ansicht nach schmälert. Hier ist weniger mehr. Ich bevorzuge die zweite Mapping-Möglichkeit.
#Der nachfolgende Code erstellt die Karte
map_ggmap <- get_map(location = 'germany', zoom = 6,
scale = "auto", maptype = c("terrain"), source = c("google"), force = ifelse(source == "google", TRUE, TRUE),
messaging = FALSE, urlonly = FALSE, filename = "ggmapTemp",
crop = TRUE, color = c("color"), language = "de-DE", api_key)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=germany&zoom=6&size=640x640&scale=2&maptype=terrain&language=de-DE&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=germany&sensor=false
#Hier wird die Karte mittels Daten anhand der Koordinaten nach longitude und latitude geplottet.
#Zwei Objekte werden entfernt, da sie außerhalb der Karte liegen und trotzdem im Wikidata Deutschland zugeordnet sind. (auch bei p2)
p <- ggmap(map_ggmap) + geom_point(
aes(x=World_Heritages$lon, y=World_Heritages$lat), col="red", alpha=0.4, size = 2,
data=World_Heritages)
#Karte mit Punkten aller UNESCO Weltkulturerbe in Deutschland.
p
# 3 Beschriftungsm?glichkeiten:
# FAVORIT unter den Varianten!
# geom_text_repel versucht ein Überlappen der Labels zu verhindert.
# Dies ist jedoch erst in einer exportierten Grafik des Plots bei ca. 1500x1500 pixels richtig erkennbar. - nicht aber in der Plotvorschau in R.
# siehe hierzu Grafik_1 aus Anhang_Aufgabe_3"
p + geom_text_repel(data = World_Heritages, aes(label=World_Heritages$heritageLabel, fontface = "bold"), box.padding = unit(0.3, "lines")) +
annotate("text", x = 10, y = 55, label = "Weltkulturerbestätten und Weltnaturerbestätten in Deutschland",
col="black", cex=6,
fontface = "bold", alpha = 0.8)
# hier werden nur besimmte Labels geplottet. Daher ist diese Funktion eher ungeeignet.
p + geom_text(data = World_Heritages, aes(label=World_Heritages$heritageLabel, fontface = "bold"), check_overlap=TRUE) +
annotate("text", x = 10, y = 55, label = "Weltkulturerbestätten und Weltnaturerbest?tten in Deutschland",
col="black", cex=6,
fontface = "bold", alpha = 0.8)
# Die Labels sehen schön aus. Ein Überlappen lässt sich leider jedoch nicht das ich wüsste vermeiden.
p + geom_label(data = World_Heritages, aes(label = World_Heritages$heritageLabel, fontface = "bold")) +
annotate("text", x = 10, y = 55, label = "Weltkulturerbest?tten und Weltnaturerbest?tten in Deutschland",
col="black", cex=6,
fontface = "bold", alpha = 0.8)
# zweite Mapping-Möglichkeit ohne Städte- und Ländernamen (anschaulicher):
#Der nachfolgende Code erstellt die Karte mittels get_googlemap aus ggmap
map_googlemap <- get_googlemap(center = c(10.25828, 51.11484), zoom = 6, maptype = "terrain",
style = 'feature:road|element:all|visibility:simplified&style=feature:administrative.locality|element:labels|visibility:off&style=feature:administrative.country|element:labels|visibility:off')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=51.11484,10.25828&zoom=6&size=640x640&scale=2&maptype=terrain&style=feature:road%7Celement:all%7Cvisibility:simplified&style=feature:administrative.locality%7Celement:labels%7Cvisibility:off&style=feature:administrative.country%7Celement:labels%7Cvisibility:off&sensor=false
#Hier wird die Karte mittels Daten anhand der Koordinaten nach longitude und latitude geplottet.
p2 <- ggmap(map_googlemap) + geom_point(
aes(x=World_Heritages$lon, y=World_Heritages$lat), col="red", alpha=0.4, size = 2,
data=World_Heritages)
#Karte mit Punkten aller UNESCO Weltkulturerbe in Deutschland.
p2
# 3 Beschriftungsmöglichkeiten (mit Annotation als Überschrift):
# FAVORIT unter den Varianten!
# siehe hierzu Grafik_2 aus "Anhang_Aufgabe_3"
p2 + geom_text_repel(data = World_Heritages, aes(label = World_Heritages$heritageLabel, fontface = "bold")) +
annotate("text", x = 10, y = 55, label = "Weltkulturerbestätten und Weltnaturerbestätten in Deutschland",
col="black", cex=6,
fontface = "bold", alpha = 0.8)
# Wie oben, nur mit neuer Map: Die Labels sehen schön aus. Ein Überlappen lässt sich leider jedoch nicht das ich wüsste vermeiden.
p2 + geom_label(data = World_Heritages, aes(label = World_Heritages$heritageLabel, fontface = "bold")) +
annotate("text", x = 10, y = 55, label = "Weltkulturerbestätten und Weltnaturerbestätten in Deutschland",
col="black", cex=6,
fontface = "bold", alpha = 0.8)
# Wie oben, nur mit neuer Map: hier werden nur besimmte Labels geplottet. Daher ist diese Funktion eher ungeeignet.
p2 + geom_text(data = World_Heritages, aes(label = World_Heritages$heritageLabel, fontface = "bold"), check_overlap=TRUE) +
annotate("text", x = 10, y = 55, label = "Weltkulturerbestätten und Weltnaturerbestätten in Deutschland",
col="black", cex=6,
fontface = "bold", alpha = 0.8)
#Zusätzliches zur Wikidata Query:
#Falls man die Jahre & Bilder der heritages mit hinzu nehmen möchte verliert man einen Großteil der Ergebnisse, da diese nicht überall eingepflegt sind.
#Das wäre dann die zugehörige Query für Wikidata:
World_Heritages = query_wikidata('#List of World Heritages in Germany
#defaultView:Map
SELECT DISTINCT ?heritage ?heritageLabel (YEAR(?date) as ?year) ?image ?lat ?lon
WHERE
{
?heritage wdt:P1435 wd:Q9259 ;
wdt:P17 wd:Q183 ;
wdt:P18 ?image ;
wdt:P571 ?date ;
wdt:P625 ?coord .
?heritage p:P625 ?coordinate .
?coordinate psv:P625 ?coordinate_node .
?coordinate_node wikibase:geoLatitude ?lat .
?coordinate_node wikibase:geoLongitude ?lon .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en,de". }
}
ORDER BY ?date')
## 9 rows were returned by WDQS
#Falls man die Bezeichnungen für die Weltkulturerbe nur auf Deutsch bzw. Englisch möchte, verliert man teils Bezeichnungen von Entities und ersetzt diese durch kryptische Q-entities aus Wikidata.
#Daher wird neben "de" auch "en" in die Query aufgenommen.
require(tidyverse)
require(reshape2)
## Loading required package: reshape2
## Warning: package 'reshape2' was built under R version 3.2.5
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
require(ggplot2)
require(directlabels)
## Loading required package: directlabels
## Warning: package 'directlabels' was built under R version 3.2.5
bundesligatable <- read.csv2("http://www.football-data.co.uk/mmz4281/1617/D1.csv" , sep=",", header=TRUE, stringsAsFactors = FALSE)
bundesligatable2 <- select(bundesligatable,c(2:7))
newBundesligatable<-reshape(bundesligatable2,dir ="long",varying = list(c(2,3)))
newBundesligatable$punkte=0
newBundesligatable[newBundesligatable$FTR=='H' & newBundesligatable$time==1,]$punkte=3
newBundesligatable[newBundesligatable$FTR=='A' & newBundesligatable$time==2,]$punkte=3
newBundesligatable[newBundesligatable$FTR=='D',]$punkte=1
clublist <- unique(newBundesligatable$HomeTeam)
getClubData <- function(name){
clubData <- newBundesligatable %>%
select(HomeTeam,punkte,Date) %>%
filter(HomeTeam==name) %>%
mutate(punkteSpieltag = cumsum(punkte))%>%
mutate(spieltag =row_number(HomeTeam))
return (clubData)
}
createFinalTable <- function(df,clubName){
ClubData<-getClubData(clubName)
df<-mutate(df, clubName = ClubData$punkteSpieltag)
colnames(df)[length(df)]<-clubName
return (df)
}
firstClub = getClubData(clublist[1])
df<-firstClub%>%
select(spieltag)
for(i in clublist ){
df<-createFinalTable(df,i)
}
meltDF<-melt(df, id ="spieltag")
meltDF$teamcolor<-""
meltDF[meltDF$variable=='Bayern Munich',]$teamcolor<-'red'
meltDF[meltDF$variable=='Augsburg',]$teamcolor<-'red1'
meltDF[meltDF$variable=='Dortmund',]$teamcolor<-'yellow'
meltDF[meltDF$variable=='Ein Frankfurt',]$teamcolor<-'red2'
meltDF[meltDF$variable=='FC Koln',]$teamcolor<-'firebrick'
meltDF[meltDF$variable=="M'gladbach",]$teamcolor<-'green2'
meltDF[meltDF$variable=='Hertha',]$teamcolor<-'blue'
meltDF[meltDF$variable=='Hoffenheim',]$teamcolor<-'blue1'
meltDF[meltDF$variable=='Schalke 04',]$teamcolor<-'blue2'
meltDF[meltDF$variable=='Darmstadt',]$teamcolor<-'blue3'
meltDF[meltDF$variable=='Freiburg',]$teamcolor<-'black'
meltDF[meltDF$variable=='Ingolstadt',]$teamcolor<-'red4'
meltDF[meltDF$variable=='Leverkusen',]$teamcolor<-'green3'
meltDF[meltDF$variable=='RB Leipzig',]$teamcolor<-'blue4'
meltDF[meltDF$variable=='Wolfsburg',]$teamcolor<-'green1'
meltDF[meltDF$variable=='Mainz',]$teamcolor<-'red1'
meltDF[meltDF$variable=='Werder Bremen',]$teamcolor<-'green4'
meltDF[meltDF$variable=='Hamburg',]$teamcolor<-'firebrick2'
colmapping=unique(meltDF[c("variable","teamcolor")])
g<-ggplot(data=meltDF,
aes(x=spieltag, y=value, color=variable, group=variable, frame=spieltag)) +
geom_line()+
scale_colour_manual("variable",values = c(colmapping$teamcolor))+
scale_x_continuous(breaks = seq(1, 34, 1),limits = c(0,36),sec.axis = dup_axis(name = waiver()))+
scale_y_continuous(breaks = seq(min(df[34,]), max(df[34,]), 1),position = "right")+
expand_limits(x = 1, y = 0)+
geom_dl(aes(label = variable), method = list(dl.trans(x = x + 0.2), "last.points", cex = 0.6))+ ylab('Punkte nach dem letzten Spieltag')
g<-g+coord_fixed(ratio=0.3)
g
g2<-ggplot(data=meltDF,
aes(x=spieltag, y=value, color=variable, group=variable, frame=spieltag)) +
geom_point()+
scale_colour_manual("variable",values = c(colmapping$teamcolor))+
scale_x_continuous(breaks = seq(1, 34, 1),limits = c(0,36),sec.axis = dup_axis(name = waiver()))+
scale_y_continuous(breaks = seq(min(df[34,]), max(df[34,]), 1),position = "right")+
expand_limits(x = 1, y = 0)
g2<-g2+coord_fixed(ratio=0.3)
g2+facet_wrap(~spieltag)
gganimate, bekamen wir nicht zum laufen.